In [8]:

    
# Load the "autoreload" extension
%load_ext autoreload

# always reload modules marked with "%aimport"
%autoreload 1

import os
import sys

# add the 'src' directory as one where we can import modules
src_dir = os.path.join(os.getcwd(), os.pardir, 'src', 'data')
sys.path.append(src_dir)



In [9]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from sklearn import metrics
from tqdm import tqdm
from scipy.ndimage.filters import gaussian_filter1d
from numpy.random import poisson



In [3]:

    
interim_dir = os.path.join(os.getcwd(), os.pardir, 'data', 'interim')
figures_dir = os.path.join(os.getcwd(), os.pardir, 'reports', 'figures')



In [4]:

    
%aimport scrape_buda



In [75]:

    
from scrape_buda import BudaRating



In [76]:

    
ratings = scrape_buda.BudaRating()
prefix = os.path.join(interim_dir, 'data20160521')
ratings.load_buda(prefix)



In [77]:

    
ratings.predicted_rating()









    



  2%|▏         | 74/3031 [00:04<02:15, 21.87it/s]





    



> /Users/rbussman/Projects/BUDA/buda-ratings/src/data/scrape_buda.py(432)predict_team()
-> for player in players:
(Pdb) player
u'Ho, Vivian'
(Pdb) league_type
u'Hat'
(Pdb) experience_rating
[1404.1394797153309, 1733.1428571428573, 1001.1428571428571, 1001.1428571428571, nan]
(Pdb) previous_ratings
[nan]
(Pdb) previous_club_teams
[40308]
(Pdb) previous_teams
array([40308, 39740])
(Pdb) teams
array([40308, 40309, 39740])
(Pdb) 180 * 10 * 15
27000
(Pdb) 180 * 15 * 20
54000
(Pdb) 180 * 1000
180000
(Pdb) 1.2e6/180
6666.666666666667
(Pdb) 1.3e6/180
7222.222222222223
(Pdb) q






    









    



---------------------------------------------------------------------------
BdbQuit                                   Traceback (most recent call last)
<ipython-input-77-e4e54e92b20f> in <module>()
----> 1 ratings.predicted_rating()

/Users/rbussman/Projects/BUDA/buda-ratings/notebooks/../src/data/scrape_buda.py in predicted_rating(self)
    553                 n_capexp_allteams.append(-1)
    554                 continue
--> 555             dfrating, n_cap, n_exp, n_capexp = self.predict_team(str(team_id))
    556             self_allteams.append(dfrating['self_rating'].mean())
    557             captain_allteams.append(dfrating['captain_rating'].mean())

/Users/rbussman/Projects/BUDA/buda-ratings/notebooks/../src/data/scrape_buda.py in predict_team(self, team_id)
    430 
    431         # for each player, get their rating based on previous performance
--> 432         for player in players:
    433             captain_or_experience = False
    434 

/Users/rbussman/Projects/BUDA/buda-ratings/notebooks/../src/data/scrape_buda.py in predict_team(self, team_id)
    430 
    431         # for each player, get their rating based on previous performance
--> 432         for player in players:
    433             captain_or_experience = False
    434 

/Users/rbussman/anaconda/lib/python2.7/bdb.pyc in trace_dispatch(self, frame, event, arg)
     47             return # None
     48         if event == 'line':
---> 49             return self.dispatch_line(frame)
     50         if event == 'call':
     51             return self.dispatch_call(frame, arg)

/Users/rbussman/anaconda/lib/python2.7/bdb.pyc in dispatch_line(self, frame)
     66         if self.stop_here(frame) or self.break_here(frame):
     67             self.user_line(frame)
---> 68             if self.quitting: raise BdbQuit
     69         return self.trace_dispatch
     70 

BdbQuit:



In [70]:

    
ratings.allteams[ratings.allteams['teamid'] == 39878]









    Out[70]:






  
    
      
      Unnamed: 0
      divname
      divrating
      plusminus
      season
      teamid
      teamname
      type
      year



In [59]:

    
ratings.check_league_type(39878)









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-59-f335cdcafbcb> in <module>()
----> 1 ratings.check_league_type(39878)

/Users/rbussman/Projects/BUDA/buda-ratings/notebooks/../src/data/scrape_buda.py in check_league_type(self, team_id)
    387 
    388         return self.allteams.loc[self.allteams['teamid'] == team_id,
--> 389                                  'type'].values[0]
    390 
    391     def predict_team(self, team_id):

IndexError: index 0 is out of bounds for axis 0 with size 0



In [78]:

    
ratings.allteams = pd.read_csv(os.path.join(interim_dir, 'withselfcaptainensemble_ratings_numbers.csv'))



In [37]:

    
ratings.allteams.to_csv(os.path.join(interim_dir, 'withselfcaptainensemble_ratings_numbers.csv'))



In [87]:

    
year_index = (ratings.allteams['year'] == 2015) & (ratings.allteams['type'] == 'Hat')
this_year = ratings.allteams[year_index]



In [89]:

    
len(this_year)









    Out[89]:





96



In [91]:

    
96 * 10









    Out[91]:





960



In [32]:

    
sph_index = (ratings.allteams['season'] == 'Spring') & \
            (ratings.allteams['type'] == 'Hat') & \
            (ratings.allteams['divname'] == 'JP Mixed (4/3)') & \
            (ratings.allteams['year'] >= 2010)
sph = ratings.allteams[sph_index]



In [58]:

    
sns.set_context('poster')
sns.set_style('white')
fig, ax = plt.subplots(figsize=(6, 5))
sns.distplot(sph['plusminus'], kde=False, bins=range(-12,12))
plt.ylabel('Number of Teams')
plt.xlabel('Average Plus/Minus per Game')
plt.tight_layout()
plt.savefig(os.path.join(figures_dir, 'PlusMinusDistribution'))



In [94]:

    
whoa = (sph['plusminus']) >= 5
len(sph[whoa])









    Out[94]:





16



In [93]:

    
whoa = (sph['plusminus']) <= -5
len(sph[whoa])









    Out[93]:





14



In [29]:

    
len(sph)









    Out[29]:





171



In [30]:

    
171 / 7.









    Out[30]:





24.428571428571427



In [31]:

    
sph.head()









    Out[31]:






  
    
      
      Unnamed: 0
      Unnamed: 0.1
      divname
      divrating
      plusminus
      season
      teamid
      teamname
      type
      year
      self_rating
      captain_rating
      draft_rating
      experience_rating
    
  
  
    
      78
      78
      78
      JP Mixed (4/3)
      0.0
      6.800000
      Spring
      40309
      Mark Hammer (1)
      Hat
      2016
      51.657980
      54.342020
      53.000000
      1269.603025
    
    
      79
      79
      79
      JP Mixed (4/3)
      0.0
      6.333333
      Spring
      40310
      R2Team2 (2)
      Hat
      2016
      52.185185
      53.014815
      52.600000
      1352.263306
    
    
      80
      80
      80
      JP Mixed (4/3)
      0.0
      4.333333
      Spring
      40311
      Chewblocka (3)
      Hat
      2016
      49.710101
      54.689899
      52.200000
      1345.951651
    
    
      81
      81
      81
      JP Mixed (4/3)
      0.0
      -1.800000
      Spring
      40312
      Never Teal Me The Odds (4)
      Hat
      2016
      51.327705
      51.472295
      51.400000
      1106.513497
    
    
      82
      82
      82
      JP Mixed (4/3)
      0.0
      -2.000000
      Spring
      40313
      Ernest goes to Doyle's (5)
      Hat
      2016
      55.491738
      52.641595
      54.066667
      1107.501716



In [32]:

    
30/171.









    Out[32]:





0.17543859649122806



In [42]:

    
years = range(2010, 2017)
for year in years:
    sph_index1 = (ratings.allteams['season'] == 'Spring') & \
            (ratings.allteams['type'] == 'Hat') & \
            (ratings.allteams['divname'] == 'JP Mixed (4/3)') & \
            (ratings.allteams['year'] == year)
    sph1 = ratings.allteams[sph_index1]
    print(year, len(sph1))









    



(2010, 28)
(2011, 24)
(2012, 24)
(2013, 24)
(2014, 23)
(2015, 24)
(2016, 24)



In [43]:

    
year = 2014
sph_index1 = (ratings.allteams['season'] == 'Spring') & \
        (ratings.allteams['type'] == 'Hat') & \
        (ratings.allteams['divname'] == 'JP Mixed (4/3)') & \
        (ratings.allteams['year'] == year)
sph1 = ratings.allteams[sph_index1]



In [44]:

    
sph1









    Out[44]:






  
    
      
      Unnamed: 0
      Unnamed: 0.1
      divname
      divrating
      plusminus
      season
      teamid
      teamname
      type
      year
      self_rating
      captain_rating
      draft_rating
      experience_rating
    
  
  
    
      549
      549
      549
      JP Mixed (4/3)
      0.0
      -6.200000
      Spring
      36544
      Winter Swill Gang (1)
      Hat
      2014
      52.922924
      48.077076
      50.500000
      1142.824305
    
    
      550
      550
      550
      JP Mixed (4/3)
      0.0
      9.333333
      Spring
      36545
      Two if by Sea (2)
      Hat
      2014
      46.990074
      54.884926
      50.937500
      1274.370441
    
    
      551
      551
      551
      JP Mixed (4/3)
      0.0
      -2.857143
      Spring
      36546
      Quality Comfort and Price (3)
      Hat
      2014
      51.654018
      50.220982
      50.937500
      1204.437612
    
    
      552
      552
      552
      JP Mixed (4/3)
      0.0
      1.833333
      Spring
      36547
      Callahan Tunnel (4)
      Hat
      2014
      49.703646
      52.046354
      50.875000
      1240.555548
    
    
      553
      553
      553
      JP Mixed (4/3)
      0.0
      -4.333333
      Spring
      36548
      USS Cutstitution (5)
      Hat
      2014
      51.332813
      50.917187
      51.125000
      1175.209420
    
    
      554
      554
      554
      JP Mixed (4/3)
      0.0
      3.833333
      Spring
      36549
      Make Way for Hucklings (6)
      Hat
      2014
      52.298643
      47.826357
      50.062500
      1111.473187
    
    
      555
      555
      555
      JP Mixed (4/3)
      0.0
      -4.000000
      Spring
      36550
      Donnie Stallberg (7)
      Hat
      2014
      46.406662
      49.718338
      48.062500
      1074.428412
    
    
      556
      556
      556
      JP Mixed (4/3)
      0.0
      -4.571429
      Spring
      36551
      JP Flicks (8)
      Hat
      2014
      54.401042
      47.848958
      51.125000
      1180.879226
    
    
      557
      557
      557
      JP Mixed (4/3)
      0.0
      1.333333
      Spring
      36552
      Mahky Mahk and the Nu Ds (9)
      Hat
      2014
      50.807292
      49.692708
      50.250000
      1259.949807
    
    
      558
      558
      558
      JP Mixed (4/3)
      0.0
      -1.166667
      Spring
      36553
      Ten Teal Seals in a Touch Tank (10)
      Hat
      2014
      50.493056
      52.506944
      51.500000
      1315.944470
    
    
      559
      559
      559
      JP Mixed (4/3)
      0.0
      1.714286
      Spring
      36554
      Lobstahbacks (11)
      Hat
      2014
      47.208681
      51.416319
      49.312500
      1309.554897
    
    
      560
      560
      560
      JP Mixed (4/3)
      0.0
      5.500000
      Spring
      36555
      Boston Huck Tour (12)
      Hat
      2014
      48.636458
      54.238542
      51.437500
      1187.685577
    
    
      561
      561
      561
      JP Mixed (4/3)
      0.0
      -2.666667
      Spring
      36556
      Good Will Hucking (13)
      Hat
      2014
      54.169630
      47.030370
      50.600000
      1169.923452
    
    
      562
      562
      562
      JP Mixed (4/3)
      0.0
      -0.857143
      Spring
      36557
      Quincy Mahk It (14)
      Hat
      2014
      53.017340
      53.515993
      53.266667
      1306.848992
    
    
      563
      563
      563
      JP Mixed (4/3)
      0.0
      -0.714286
      Spring
      36558
      Huck Fah Gain Yahds (15)
      Hat
      2014
      46.695916
      52.679084
      49.687500
      1256.474527
    
    
      564
      564
      564
      JP Mixed (4/3)
      0.0
      -1.428571
      Spring
      36559
      Flickin' Up to Boston (16)
      Hat
      2014
      51.152857
      49.380476
      50.266667
      1138.356591
    
    
      565
      565
      565
      JP Mixed (4/3)
      0.0
      4.142857
      Spring
      36561
      faneuil stall (18)
      Hat
      2014
      46.779688
      49.595312
      48.187500
      1374.897527
    
    
      566
      566
      566
      JP Mixed (4/3)
      0.0
      -6.285714
      Spring
      36562
      Mark Stallberg on da Orange Line (19)
      Hat
      2014
      52.296218
      50.174370
      51.235294
      1234.715216
    
    
      567
      567
      567
      JP Mixed (4/3)
      0.0
      -1.333333
      Spring
      36563
      The Handblock Tower (20)
      Hat
      2014
      52.334603
      48.865397
      50.600000
      1234.520132
    
    
      568
      568
      568
      JP Mixed (4/3)
      0.0
      -1.428571
      Spring
      36564
      Huck if by Land Dump if by Sea (21)
      Hat
      2014
      50.242511
      52.424156
      51.333333
      1224.124164
    
    
      569
      569
      569
      JP Mixed (4/3)
      0.0
      5.500000
      Spring
      36565
      Boston D Party (22)
      Hat
      2014
      48.479167
      50.645833
      49.562500
      1387.404267
    
    
      570
      570
      570
      JP Mixed (4/3)
      0.0
      5.333333
      Spring
      36566
      D the People (23)
      Hat
      2014
      48.439025
      51.310975
      49.875000
      1216.266374
    
    
      571
      571
      571
      JP Mixed (4/3)
      0.0
      -1.200000
      Spring
      36567
      John Handblock (24)
      Hat
      2014
      52.264949
      52.268384
      52.266667
      1268.253408



In [45]:

    
11/7.









    Out[45]:





1.5714285714285714

There is one missing team in 2014: Team 17 Flesh Eating Virus. They went 5-2 with a total plus/minus of +11. So a per-game +/- of +1.57. No need to worry about this missing data point.

Part 2: Convert average plus/minus into likelihood of winning.

If I know the average number of goals scored in a game and I know the length of each game, then I can get an estimate of the average scoring rate.

Suppose average number of goals scored per game is 18. And that game length is 70 minutes.



In [17]:

    
goalpermin = 18/70.
goalpermin









    Out[17]:





0.2571428571428571

Then the average number of goals per minute is 0.257 goals/minute.



In [13]:

    
ok = [poisson(goalpermin, 70).sum() for i in range(171)]



In [14]:

    
sns.distplot(ok, kde=False)









    Out[14]:





<matplotlib.axes._subplots.AxesSubplot at 0x117bf3150>

That looks like a pretty reasonable distribution of point totals per game.

Next, the question we are really interested in: suppose you are on a team that has a plus/minus average per game of -5. If the halves are split evenly in time, then at halftime you are typically behind by 2.5 points, and there are another 18 or so points to be played.

I am missing something here. Need some measure of the variance of plus/minus values. Time to sleep on it.



In [15]:

    
18 / 70.









    Out[15]:





0.2571428571428571



In [16]:

    
0.257 / 2









    Out[16]:





0.1285

An average team playing against an average team has an expected goal scoring rate of 0.257 / 2 = 0.128 goals/minute. A team that averages -5 plus/minus per game gives up 2.5 goals more pre 70 minutes and scores 2.5 goals less per 70 minutes while playing an average team.



In [95]:

    
gpm1 = 18 / 2 - 3.5



In [96]:

    
gpm2 = 18/2 + 1.5



In [98]:

    
gpm1 / 70.









    Out[98]:





0.07857142857142857



In [99]:

    
gpm2/70.









    Out[99]:





0.15



In [19]:

    
ok1 = [poisson(gpm1) for i in range(171)]



In [20]:

    
ok2 = [poisson(gpm2) for i in range(171)]



In [21]:

    
sns.distplot(ok1, kde=False, bins=range(25))
sns.distplot(ok2, kde=False, bins=range(25))









    Out[21]:





<matplotlib.axes._subplots.AxesSubplot at 0x1192c46d0>



In [22]:

    
okdiff = np.array(ok1) - np.array(ok2)
oksum = np.array(ok1) + np.array(ok2)



In [23]:

    
sns.distplot(okdiff, kde=False)









    Out[23]:





<matplotlib.axes._subplots.AxesSubplot at 0x1023ab790>



In [24]:

    
sns.distplot(oksum)









    Out[24]:





<matplotlib.axes._subplots.AxesSubplot at 0x117bd91d0>



In [25]:

    
np.mean(okdiff)









    Out[25]:





-5.0760233918128659



In [26]:

    
ok = [poisson(gpm1, 70).sum() + poisson(gpm2, 70).sum() for i in range(171)]



In [27]:

    
np.mean(ok)









    Out[27]:





1122.2631578947369



In [11]:

    
def underdogwin(gpm1, gpm2, remaining_time, thresh):
    ok1 = [poisson(gpm1, 70 - remaining_time).sum() for i in range(100)]
    ok2 = [poisson(gpm2, 70 - remaining_time).sum() for i in range(100)]
    okdiff = np.array(ok1) - np.array(ok2)
    over5 = okdiff > thresh
    return len(okdiff[over5])



In [29]:

    
nsim = 100
sim_come0 = []
sim_come1 = []
sim_come2 = []
sim_come3 = []
for isim in tqdm(range(nsim)):
    come_from_behind0 = []
    come_from_behind1 = []
    come_from_behind2 = []
    come_from_behind3 = []
    remaining_times = range(70)
    for remaining_time in remaining_times:
        gpm1 = 18 / 2 / 70.
        gpm2 = 18 / 2 / 70.
        thresh = 0
        wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
        come_from_behind0.append(wins)
        
        thresh = 1
        wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
        come_from_behind1.append(wins)
        
        thresh = 2
        wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
        come_from_behind2.append(wins)
        
        thresh = 3
        wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
        come_from_behind3.append(wins)
    sim_come0.append(come_from_behind0)
    sim_come1.append(come_from_behind1)
    sim_come2.append(come_from_behind2)
    sim_come3.append(come_from_behind3)
come_mean0 = np.mean(sim_come0, axis=0)
come_mean1 = np.mean(sim_come1, axis=0)
come_mean2 = np.mean(sim_come2, axis=0)
come_mean3 = np.mean(sim_come3, axis=0)
come_std0 = np.std(sim_come0, axis=0)
come_std1 = np.std(sim_come1, axis=0)
come_std2 = np.std(sim_come2, axis=0)
come_std3 = np.std(sim_come3, axis=0)









    



100%|██████████| 100/100 [00:25<00:00,  3.89it/s]



In [12]:

    
nsim = 100
sim_comes = []
threshes = range(4)
for thresh in threshes:
    sim_come = []
    for isim in tqdm(range(nsim)):
        come_from_behind = []
        remaining_times = range(70)
        for remaining_time in remaining_times:
            gpm1 = 18 / 2 / 70.
            gpm2 = 18 / 2 / 70.
            wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
            come_from_behind.append(wins)
        sim_come.append(come_from_behind)
    sim_comes.append(sim_come)
come_mean0 = np.mean(sim_comes[0], axis=0)
come_mean1 = np.mean(sim_comes[1], axis=0)
come_mean2 = np.mean(sim_comes[2], axis=0)
come_mean3 = np.mean(sim_comes[3], axis=0)
come_std0 = np.std(sim_comes[0], axis=0)
come_std1 = np.std(sim_comes[1], axis=0)
come_std2 = np.std(sim_comes[2], axis=0)
come_std3 = np.std(sim_comes[3], axis=0)









    



100%|██████████| 100/100 [00:06<00:00, 16.73it/s]
100%|██████████| 100/100 [00:06<00:00, 14.29it/s]
100%|██████████| 100/100 [00:06<00:00, 16.93it/s]
100%|██████████| 100/100 [00:06<00:00, 16.32it/s]



In [13]:

    
nsim = 100
sim_comes = []
threshes = range(4)
for thresh in threshes:
    sim_come = []
    for isim in tqdm(range(nsim)):
        come_from_behind = []
        remaining_times = range(70)
        for remaining_time in remaining_times:
            gpm1 = 18 / 2 / 70. - 3.5 / 70
            gpm2 = 18 / 2 / 70. + 1.5 / 70
            wins = underdogwin(gpm1, gpm2, remaining_time, thresh)
            come_from_behind.append(wins)
        sim_come.append(come_from_behind)
    sim_comes.append(sim_come)
come_mean0_bad = np.mean(sim_comes[0], axis=0)
come_mean1_bad = np.mean(sim_comes[1], axis=0)
come_mean2_bad = np.mean(sim_comes[2], axis=0)
come_mean3_bad = np.mean(sim_comes[3], axis=0)
come_std0_bad = np.std(sim_comes[0], axis=0)
come_std1_bad = np.std(sim_comes[1], axis=0)
come_std2_bad = np.std(sim_comes[2], axis=0)
come_std3_bad = np.std(sim_comes[3], axis=0)









    



100%|██████████| 100/100 [00:06<00:00, 16.67it/s]
100%|██████████| 100/100 [00:06<00:00, 16.04it/s]
100%|██████████| 100/100 [00:06<00:00, 15.62it/s]
100%|██████████| 100/100 [00:06<00:00, 14.98it/s]



In [14]:

    
sns.set_context('poster')
sns.set_style('white')
f, ax = plt.subplots(figsize=(6, 5))
xarr = np.arange(len(come_mean2_bad), 0, -1)
yarr = gaussian_filter1d(come_mean2, 3)
sigarr = gaussian_filter1d(come_std2, 3)
y1 = yarr - sigarr
y2 = yarr + sigarr
# plt.fill_between(xarr, y1, y2, color='gray', alpha=0.4)
plt.plot(xarr, yarr, label='Team A equal to Team B')


yarr = gaussian_filter1d(come_mean2_bad, 3)
sigarr = gaussian_filter1d(come_std2_bad, 3)
y1 = yarr - sigarr
y2 = yarr + sigarr
# plt.fill_between(xarr, y1, y2, color='gray', alpha=0.4)
plt.plot(xarr, yarr, label='Team A much worse than Team B')

plt.ylim([0, 100])
plt.xlim([70,0])
plt.xlabel('Time Remaining [minutes]')
plt.ylabel('Percent Chance Team A Wins By Three')
plt.legend()
plt.tight_layout()
plt.savefig(os.path.join(figures_dir, 'WinBy3Probability'))
# plt.plot(come_mean_bad)



In [223]:

    
sns.set_context('poster')
# xarr = range(len(come_mean2))
# yarr = gaussian_filter1d(come_mean2, 3)
# sigarr = gaussian_filter1d(come_std2, 3)
# y1 = yarr - sigarr
# y2 = yarr + sigarr
# plt.fill_between(xarr, y1, y2, color='gray', alpha=0.5)
# plt.plot(xarr, yarr)

xarr = range(len(come_mean0_bad))
yarr = gaussian_filter1d(come_mean0_bad, 3)
sigarr = gaussian_filter1d(come_std0_bad, 3)
y1 = yarr - sigarr
y2 = yarr + sigarr
plt.fill_between(xarr, y1, y2, color='gray', alpha=0.5)
plt.plot(xarr, yarr)
# plt.plot(come_mean_bad)









    Out[223]:





[<matplotlib.lines.Line2D at 0x12e15a850>]



In [10]:

    
sph_index = (ratings.allteams['season'] == 'Spring') & \
            (ratings.allteams['type'] == 'Hat') & \
            (ratings.allteams['divname'] == 'JP Mixed (4/3)') & \
            (ratings.allteams['year'] >= 2010)
sph = ratings.allteams[sph_index]



In [84]:

    
sns.distplot(sph['n_exp_rating'], kde=False, bins=10)
sns.distplot(sph['n_cap_rating'], kde=False, bins=10)
sns.distplot(sph['n_capexp_rating'], kde=False, bins=10)









    Out[84]:





<matplotlib.axes._subplots.AxesSubplot at 0x11d153c10>



In [80]:

    
sph['n_exp_rating'].mean()









    Out[80]:





0.7203947368421053



In [81]:

    
sph['n_cap_rating'].mean()









    Out[81]:





0.6823830409356725



In [85]:

    
sph['n_capexp_rating'].mean()









    Out[85]:





0.8366228070175439



In [82]:

    
sph['n_exp_rating'].median()









    Out[82]:





0.75



In [83]:

    
sph['n_cap_rating'].median()









    Out[83]:





0.6875



In [86]:

    
sph['n_capexp_rating'].median()









    Out[86]:





0.8125



In [70]:

    
.75*16









    Out[70]:





12.0



In [71]:

    
.69*16









    Out[71]:





11.04



In [87]:

    
.84*16









    Out[87]:





13.44

Simulation of two evenly matched teams. What is the distribution of 171 instances of observed average point differential over 7 games?



In [88]:

    
goalpermin









    Out[88]:





0.2571428571428571



In [23]:

    
avgoff = []
for isim in range(171):
    ok1 = [poisson(goalpermin/2, 70).sum() for i in range(7)]
    ok2 = [poisson(goalpermin/2, 70).sum() for i in range(7)]
    off = np.array(ok1) - np.array(ok2)
    avgoff.append(off.mean())



In [109]:

    
sns.set_context('poster')
sns.set_style('white')
fig, ax = plt.subplots(figsize=(6, 5))
sns.distplot(avgoff, kde=False, bins=range(-10, 11), color='gray', label='Equal Skill')
sns.distplot(sph['plusminus'], kde=False, bins=range(-10,11), label='Observed')
plt.ylabel('Number of Teams')
plt.xlabel('Average Plus/Minus per Game')
plt.tight_layout()
plt.legend()
plt.savefig(os.path.join(figures_dir, 'PlusMinusDistribution'))

Given equal teams, what is the likelihood of winning fewer than 10% of your games?



In [18]:

    
wins = []
for isim in range(171):
    ok1 = [poisson(goalpermin/2, 70).sum() for i in range(7)]
    ok2 = [poisson(goalpermin/2, 70).sum() for i in range(7)]
    off = np.array(ok1) - np.array(ok2)
    win_index = off > 0
    wins.append(len(off[win_index]))



In [19]:

    
sns.distplot(wins, kde=False, bins=range(8))









    Out[19]:





<matplotlib.axes._subplots.AxesSubplot at 0x117cd3b90>



In [120]:

    
np.array(wins).sum()









    Out[120]:





564



In [121]:

    
7 * 171









    Out[121]:





1197



In [122]:

    
564 / 1197.









    Out[122]:





0.47117794486215536



In [123]:

    
1/7.









    Out[123]:





0.14285714285714285



In [130]:

    
n1 = len(np.array(wins)[np.array(wins) < 1])



In [131]:

    
n1









    Out[131]:





2



In [132]:

    
2 / 171.









    Out[132]:





0.011695906432748537



In [28]:

    
sns.set_context('poster')
sns.set_style('white')
fig, axes = plt.subplots(1, 2, figsize=(12, 5))

ax = axes[0]
sns.distplot(avgoff, kde=False, bins=range(-10, 11), color='gray', label='Equal Skill', ax=ax)
sns.distplot(sph['plusminus'], kde=False, bins=range(-10,11), label='Observed', ax=ax)
ax.set_ylabel('Number of Teams')
ax.set_xlabel('Average Plus/Minus per Game')
ax.legend()

ax = axes[1]
xarr = np.arange(len(come_mean2_bad), 0, -1)
yarr = gaussian_filter1d(come_mean2, 3)
sigarr = gaussian_filter1d(come_std2, 3)
y1 = yarr - sigarr
y2 = yarr + sigarr
# plt.fill_between(xarr, y1, y2, color='gray', alpha=0.4)
ax.plot(xarr, yarr, label='Team A equal to Team B')


yarr = gaussian_filter1d(come_mean2_bad, 3)
sigarr = gaussian_filter1d(come_std2_bad, 3)
y1 = yarr - sigarr
y2 = yarr + sigarr
# plt.fill_between(xarr, y1, y2, color='gray', alpha=0.4)
ax.plot(xarr, yarr, label='Team A much worse than Team B')

ax.set_ylim([0, 100])
ax.set_xlim([70,0])
ax.set_xlabel('Time Remaining [minutes]')
ax.set_ylabel('Percent Chance Team A Wins By Three')
ax.legend()
plt.tight_layout(w_pad=2)
plt.savefig(os.path.join(figures_dir, 'PlusMinusDistribution_WinBy3Probability'))
# plt.plot(come_mean_bad)

An alternative way of investigating the probability of winning one game out of 7 or less: simple probability.



In [3]:

    
ways_to_win_one_or_less = 7 + 1



In [7]:

    
total_possible_outcomes = 2. ** 7



In [8]:

    
probability = ways_to_win_one_or_less / total_possible_outcomes



In [9]:

    
probability









    Out[9]:





0.0625



In [10]:

    
1 / 7.









    Out[10]:





0.14285714285714285



In [13]:

    
7 * 5 * 4 / 3 / 2. / 2**7









    Out[13]:





0.1796875



In [14]:

    
corepart = 1.396 + 1.476 + 1.484 + 1.363 + 1.082 + 1.038



In [16]:

    
corepart / 6.









    Out[16]:





1.3065



In [17]:

    
lcp = 786. + 800 + 819 + 899 + 1032 + 947
lcp / 6









    Out[17]:





880.5



In [18]:

    
rcp = 342. + 306 + 361 + 427 + 440 + 431
rcp / 6









    Out[18]:





384.5



In [19]:

    
(13651 - 8467) / 8467.









    Out[19]:





0.6122593598677217



In [20]:

    
8467 * 1.6









    Out[20]:





13547.2



In [12]:

    
7/11.









    Out[12]:





0.6363636363636364



In [26]:

    
leaguelist = {'alabama': 3, 'arkansas': 3, 'alaska': 3, 'arizona': 3, 'california': 10, 'colorado': 7, 'connecticut': 4,  
 'delaware': 1, 'florida': 9, 'georgia': 5, 'hawaii': 1, 'idaho': 1, 'illinois': 4, 'indiana': 8, 'iowa': 5, 
 'kansas': 4, 'kentucky': 2, 'louisiana': 1, 'maine': 3, 'maryland': 5, 'massachusetts': 2, 
 'michigan': 6, 'minnesota': 2, 'mississippi': 2, 'missouri': 2, 'montana': 2, 'nebraska': 2, 'nevada': 2, 
 'new hampshire': 1, 'new jersey': 5, 'new mexico': 3, 'new york': 11, 'north carolina': 8, 'north dakota': 0,
 'ohio': 4, 'oklahoma': 1, 'oregon': 6, 'pennsylvania': 6, 'rhode island': 1, 'south carolina': 5, 'south dakota': 1,
 'tennessee': 4, 'texas': 6, 'utah': 4, 'vermont': 2, 'virginia': 6, 
 'washington': 3, 'west virginia': 0, 'wisconsin': 3, 'wyoming': 0}



In [27]:

    
totleagues = 0
for key in leaguelist:
    totleagues += leaguelist[key]



In [28]:

    
totleagues









    Out[28]:





182



In [29]:

    
yarr









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-29-71d1f91e582f> in <module>()
----> 1 yarr

NameError: name 'yarr' is not defined



In [33]:









    Out[33]:






  
    
      
      Unnamed: 0
      divname
      divrating
      plusminus
      season
      teamid
      teamname
      type
      year
      self_rating
      captain_rating
      draft_rating
      experience_rating
      ensemble_rating
      n_exp_rating
      n_cap_rating
      n_capexp_rating
    
  
  
    
      78
      78
      JP Mixed (4/3)
      0.0
      6.800000
      Spring
      40309
      Mark Hammer (1)
      Hat
      2016
      51.657980
      54.342020
      53.000000
      1269.603025
      54.405444
      0.5625
      0.5625
      0.6250
    
    
      79
      79
      JP Mixed (4/3)
      0.0
      6.333333
      Spring
      40310
      R2Team2 (2)
      Hat
      2016
      52.185185
      53.014815
      52.600000
      1352.263306
      55.415133
      0.6875
      0.6875
      0.8125
    
    
      80
      80
      JP Mixed (4/3)
      0.0
      4.333333
      Spring
      40311
      Chewblocka (3)
      Hat
      2016
      49.710101
      54.689899
      52.200000
      1345.951651
      55.327074
      0.8750
      0.8125
      0.9375
    
    
      81
      81
      JP Mixed (4/3)
      0.0
      -1.800000
      Spring
      40312
      Never Teal Me The Odds (4)
      Hat
      2016
      51.327705
      51.472295
      51.400000
      1106.513497
      47.253289
      0.8125
      0.6875
      0.8125
    
    
      82
      82
      JP Mixed (4/3)
      0.0
      -2.000000
      Spring
      40313
      Ernest goes to Doyle's (5)
      Hat
      2016
      55.491738
      52.641595
      54.066667
      1107.501716
      48.209827
      0.8125
      0.7500
      0.8125



In [1]:

    
200/60.









    Out[1]:





3.3333333333333335



In [2]:

    
1/3. * (1800 + 1580 + 1400)









    Out[2]:





1593.3333333333333



In [3]:

    
16/6.









    Out[3]:





2.6666666666666665



In [4]:

    
.84*16









    Out[4]:





13.44



In [6]:

    
import seaborn as sns



In [10]:

    
current_palette = sns.color_palette()
sns.palplot(current_palette)



In [11]:

    
current_palette









    Out[11]:





[(0.2980392156862745, 0.4470588235294118, 0.6901960784313725),
 (0.3333333333333333, 0.6588235294117647, 0.40784313725490196),
 (0.7686274509803922, 0.3058823529411765, 0.3215686274509804),
 (0.5058823529411764, 0.4470588235294118, 0.6980392156862745),
 (0.8, 0.7254901960784313, 0.4549019607843137),
 (0.39215686274509803, 0.7098039215686275, 0.803921568627451)]



In [ ]:

	Unnamed: 0	Unnamed: 0.1	divname	plusminus	season	teamid	teamname	type	year	self_rating	captain_rating	draft_rating	experience_rating
78	78	78	JP Mixed (4/3)	6.800000	Spring	40309	Mark Hammer (1)	Hat	2016	51.657980	54.342020	53.000000	1269.603025
79	79	79	JP Mixed (4/3)	6.333333	Spring	40310	R2Team2 (2)	Hat	2016	52.185185	53.014815	52.600000	1352.263306
80	80	80	JP Mixed (4/3)	4.333333	Spring	40311	Chewblocka (3)	Hat	2016	49.710101	54.689899	52.200000	1345.951651
81	81	81	JP Mixed (4/3)	-1.800000	Spring	40312	Never Teal Me The Odds (4)	Hat	2016	51.327705	51.472295	51.400000	1106.513497
82	82	82	JP Mixed (4/3)	-2.000000	Spring	40313	Ernest goes to Doyle's (5)	Hat	2016	55.491738	52.641595	54.066667	1107.501716

	Unnamed: 0	Unnamed: 0.1	divname	plusminus	season	teamid	teamname	type	year	self_rating	captain_rating	draft_rating	experience_rating
549	549	549	JP Mixed (4/3)	-6.200000	Spring	36544	Winter Swill Gang (1)	Hat	2014	52.922924	48.077076	50.500000	1142.824305
550	550	550	JP Mixed (4/3)	9.333333	Spring	36545	Two if by Sea (2)	Hat	2014	46.990074	54.884926	50.937500	1274.370441
551	551	551	JP Mixed (4/3)	-2.857143	Spring	36546	Quality Comfort and Price (3)	Hat	2014	51.654018	50.220982	50.937500	1204.437612
552	552	552	JP Mixed (4/3)	1.833333	Spring	36547	Callahan Tunnel (4)	Hat	2014	49.703646	52.046354	50.875000	1240.555548
553	553	553	JP Mixed (4/3)	-4.333333	Spring	36548	USS Cutstitution (5)	Hat	2014	51.332813	50.917187	51.125000	1175.209420
554	554	554	JP Mixed (4/3)	3.833333	Spring	36549	Make Way for Hucklings (6)	Hat	2014	52.298643	47.826357	50.062500	1111.473187
555	555	555	JP Mixed (4/3)	-4.000000	Spring	36550	Donnie Stallberg (7)	Hat	2014	46.406662	49.718338	48.062500	1074.428412
556	556	556	JP Mixed (4/3)	-4.571429	Spring	36551	JP Flicks (8)	Hat	2014	54.401042	47.848958	51.125000	1180.879226
557	557	557	JP Mixed (4/3)	1.333333	Spring	36552	Mahky Mahk and the Nu Ds (9)	Hat	2014	50.807292	49.692708	50.250000	1259.949807
558	558	558	JP Mixed (4/3)	-1.166667	Spring	36553	Ten Teal Seals in a Touch Tank (10)	Hat	2014	50.493056	52.506944	51.500000	1315.944470
559	559	559	JP Mixed (4/3)	1.714286	Spring	36554	Lobstahbacks (11)	Hat	2014	47.208681	51.416319	49.312500	1309.554897
560	560	560	JP Mixed (4/3)	5.500000	Spring	36555	Boston Huck Tour (12)	Hat	2014	48.636458	54.238542	51.437500	1187.685577
561	561	561	JP Mixed (4/3)	-2.666667	Spring	36556	Good Will Hucking (13)	Hat	2014	54.169630	47.030370	50.600000	1169.923452
562	562	562	JP Mixed (4/3)	-0.857143	Spring	36557	Quincy Mahk It (14)	Hat	2014	53.017340	53.515993	53.266667	1306.848992
563	563	563	JP Mixed (4/3)	-0.714286	Spring	36558	Huck Fah Gain Yahds (15)	Hat	2014	46.695916	52.679084	49.687500	1256.474527
564	564	564	JP Mixed (4/3)	-1.428571	Spring	36559	Flickin' Up to Boston (16)	Hat	2014	51.152857	49.380476	50.266667	1138.356591
565	565	565	JP Mixed (4/3)	4.142857	Spring	36561	faneuil stall (18)	Hat	2014	46.779688	49.595312	48.187500	1374.897527
566	566	566	JP Mixed (4/3)	-6.285714	Spring	36562	Mark Stallberg on da Orange Line (19)	Hat	2014	52.296218	50.174370	51.235294	1234.715216
567	567	567	JP Mixed (4/3)	-1.333333	Spring	36563	The Handblock Tower (20)	Hat	2014	52.334603	48.865397	50.600000	1234.520132
568	568	568	JP Mixed (4/3)	-1.428571	Spring	36564	Huck if by Land Dump if by Sea (21)	Hat	2014	50.242511	52.424156	51.333333	1224.124164
569	569	569	JP Mixed (4/3)	5.500000	Spring	36565	Boston D Party (22)	Hat	2014	48.479167	50.645833	49.562500	1387.404267
570	570	570	JP Mixed (4/3)	5.333333	Spring	36566	D the People (23)	Hat	2014	48.439025	51.310975	49.875000	1216.266374
571	571	571	JP Mixed (4/3)	-1.200000	Spring	36567	John Handblock (24)	Hat	2014	52.264949	52.268384	52.266667	1268.253408